package com.cmge.ad.spider.article.naojing;

import java.util.ArrayList;
import java.util.List;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Selectable;

import com.cmge.ad.model.Article;
import com.cmge.ad.spider.pipeline.MysqlArticlePipeline;

/**
 * @desc	脑经急转弯  已爬
 * 			http://www.2345.com/jzw/74.htm
 * @author	ljt
 * @time	2014-12-29 上午11:16:05
 */
public class NaoJingPageProcessor implements PageProcessor {

    private Site site = Site.me().setRetryTimes(3).setSleepTime(100);

    public static final String URL_LIST = "jzw/\\w+";
    
    // 列表最大值
    private int max = 74;
    
    @Override
    public void process(Page page) {
    	// 检索当前页面所有段子
    	List<Selectable> cList = page.getHtml().xpath("//ul[@id='J_listTable']//li").nodes();
    	List<Article> articleList = new ArrayList<Article>();
		if(null != cList && cList.size() > 0){
			for(Selectable str : cList){
				Article article = new Article();
				String content = str.xpath("//span[@class='table_left']/text()").get();
				String key = str.xpath("//a[@class='answer']/@onclick").get();
				key = key.substring(key.indexOf("：")+1,key.lastIndexOf("'"));
				article.setContent(content);
				article.setSource("2345_naojing");
				article.setKey(key);
				article.setType(1);
				article.setMinImageUrl("");
				article.setMaxImageUrl("");
				articleList.add(article);
    		}
			page.putField("articleList",articleList);
			page.putField("type", 1);
		}
		
		// 当前页
		String url = page.getUrl().get();
		int current = 1;
		try {
			current = Integer.parseInt(url.substring(url.lastIndexOf("/")+1,url.lastIndexOf(".htm")));
		} catch (Exception e) {
			e.printStackTrace();
		}
		
		System.out.println("current is :"+current);
		
		if(current < max){
			page.addTargetRequest("http://www.2345.com/jzw/"+(current+1)+".htm");
		}
    }

    @Override
    public Site getSite() {
        return site;
    }

    public static void main(String[] args) throws Exception {
    	Spider qsSpider = Spider.create(new NaoJingPageProcessor())
    					.addUrl("http://www.2345.com/jzw/1.htm")
//    					.addPipeline(new RedisPipeline())
//    					.addPipeline(new JsonFilePipeline())
//    					.addPipeline(new JsonPipeline())
    					.addPipeline(new MysqlArticlePipeline())
    					.thread(1);
    	qsSpider.start();
    }
}
